import requests
from lxml import etree
import openpyxl

def urls():
     base_url = 'https://search.jd.com/search?keyword=%E7%AC%94%E8%AE%B0%E6%9C%AC%E7%94%B5%E8%84%91&wq=%E7%AC%94%E8%AE%B0%E6%9C%AC%E7%94%B5%E8%84%91&cid3=672&cid2=671'
     res = requests.get(url=base_url, headers=headers).text
     tree = etree.HTML(res)
     names = tree.xpath('//*[@id="J_selector"]/div[1]/div/div[2]/div[2]/ul/li')[:18]

     wb = openpyxl.load_workbook(path)
     ws = wb.active
     ws['A1'].value = '电脑品牌'
     ws['B1'].value = '总页数'
     ws['C1'].value = 'URL'

     for i in names:
          name = ''.join(i.xpath('./a/@title'))
          url = 'https://search.jd.com/'+''.join(i.xpath('./a/@href'))
          page = pages(url)
          ws[f'A{names.index(i)+2}'].value = name
          ws[f'B{names.index(i) + 2}'].value = page
          ws[f'C{names.index(i) + 2}'].value = url
     wb.save(path)

def pages(url):
     res = requests.get(url=url, headers=headers).text
     tree = etree.HTML(res)
     page = ''.join(tree.xpath('//*[@id="J_topPage"]/span/i/text()'))
     return page

def creat_sheet():
     wb = openpyxl.Workbook(path)
     wb.create_sheet('url_info')
     ws = wb['url_info']
     wb.save(path)

if __name__ =='__main__':
     headers = {
          'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.101 Safari/537.36',
     }
     path = r'/crawler/JD/CRAW/url_info.xlsx'
     print('开始抓取url了！')
     creat_sheet()
     urls()
     print('抓取结束了！')